Obj 2: Modelling Indgenous forests and landscape structure effects on fire-relted diseases
Data Loading and Preprocessing
Load temporal health data and coordinates, merge and clean the datasets
coord<- read.csv("data/raw/coordinates.csv", header = T) %>% dplyr::select(-FID)
data<- read.csv("data/processed/combined_data.csv", header = T)
fire.data <- data %>%
dplyr::select(country, COD, year, fire_related, pop, IDH, for_PD, for_ED, for_AI,
FS_PLAND, tot_IT, NOTackn_IT, acknlgd_IT, FS_noIT, pm25_SUM) %>%
left_join(coord, by = "COD", relationship = "many-to-many") %>%
# Removing NAs from the dataset
drop_na(fire_related) %>%
# Removing negative values of for_noIT from the dataset
filter(FS_noIT > 0.00) %>%
# fire variables
dplyr::rename("fire.cases"= fire_related) %>%
mutate(fire.incidence= (fire.cases * pop)/100000)Data Overview and Distributions
Summary statistics and structure for dataset
| Name | fire.data |
| Number of rows | 18017 |
| Number of columns | 18 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| numeric | 16 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| country | 0 | 1 | 4 | 8 | 0 | 6 | 0 |
| COD | 0 | 1 | 2 | 10 | 0 | 1139 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| year | 0 | 1 | 2.011440e+03 | 4.990000e+00 | 2001.00 | 2008.00 | 2012.00 | 2.016000e+03 | 2.019000e+03 | ▃▅▆▇▇ |
| fire.cases | 0 | 1 | 2.357340e+03 | 6.294000e+03 | 0.00 | 80.00 | 320.00 | 1.109000e+03 | 3.485400e+04 | ▇▁▁▁▁ |
| pop | 0 | 1 | 5.917579e+04 | 1.462042e+05 | 180.00 | 5949.42 | 13098.98 | 2.969226e+04 | 2.021703e+06 | ▇▁▁▁▁ |
| IDH | 0 | 1 | 7.200000e-01 | 4.000000e-02 | 0.63 | 0.69 | 0.72 | 7.600000e-01 | 7.800000e-01 | ▂▃▅▃▇ |
| for_PD | 0 | 1 | 7.800000e-01 | 7.700000e-01 | 0.00 | 0.20 | 0.58 | 1.090000e+00 | 6.480000e+00 | ▇▂▁▁▁ |
| for_ED | 0 | 1 | 2.241000e+01 | 1.698000e+01 | 0.00 | 8.83 | 18.53 | 3.246000e+01 | 9.568000e+01 | ▇▅▂▁▁ |
| for_AI | 0 | 1 | 9.100000e+01 | 1.139000e+01 | 0.00 | 89.11 | 95.51 | 9.839000e+01 | 1.000000e+02 | ▁▁▁▁▇ |
| FS_PLAND | 0 | 1 | 5.505000e+01 | 3.118000e+01 | 0.00 | 27.96 | 57.08 | 8.544000e+01 | 9.981000e+01 | ▅▅▅▅▇ |
| tot_IT | 0 | 1 | 1.101000e+01 | 1.966000e+01 | 0.00 | 0.00 | 0.03 | 1.368000e+01 | 9.808000e+01 | ▇▁▁▁▁ |
| NOTackn_IT | 0 | 1 | 1.500000e+00 | 6.370000e+00 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 7.742000e+01 | ▇▁▁▁▁ |
| acknlgd_IT | 0 | 1 | 9.510000e+00 | 1.768000e+01 | 0.00 | 0.00 | 0.00 | 1.106000e+01 | 9.808000e+01 | ▇▁▁▁▁ |
| FS_noIT | 0 | 1 | 4.404000e+01 | 2.866000e+01 | 0.00 | 19.93 | 41.94 | 6.759000e+01 | 9.981000e+01 | ▇▇▇▅▅ |
| pm25_SUM | 0 | 1 | 8.618877e+10 | 1.967173e+11 | 16807229.50 | 2088213703.25 | 9685973380.00 | 5.414608e+10 | 1.926057e+12 | ▇▁▁▁▁ |
| X | 0 | 1 | -6.574621e+05 | 1.134267e+06 | -2172880.00 | -1699250.00 | -792351.00 | 3.406460e+05 | 1.740920e+06 | ▇▃▃▃▂ |
| Y | 0 | 1 | 2.732476e+06 | 7.070125e+05 | 1371160.00 | 2194820.00 | 2778030.00 | 3.213150e+06 | 4.274770e+06 | ▃▆▇▅▂ |
| fire.incidence | 0 | 1 | 9.068220e+03 | 3.289571e+04 | 0.00 | 6.20 | 35.00 | 1.867400e+02 | 2.182335e+05 | ▇▁▁▁▁ |
Visualize the distribution of the response variables
Predictor Correlations and Visualization
Plot pairwise relationships between predictors, focusing on key predictors
# Custom function to visualize binned correlations
my_bin <- function(data, mapping, ..., low = "#132B43", high = "#56B1F7") {
ggplot(data = data, mapping = mapping) +
geom_bin2d(...) + # Binned 2D histogram
scale_fill_gradient(low = low, high = high) # Custom color gradient
}
ggpairs(
fire.data,
columns = c("tot_IT", "FS_PLAND", "FS_noIT", "for_ED", "for_AI", "for_PD"),
lower = list(
combo = wrap("facethist", binwidth = 1), # Use faceted histograms for lower panels
continuous = wrap(my_bin, binwidth = c(5, 0.5), high = "red") # Customized binned 2D histograms
)
)# # Examine correlation coefficients between key variables to identify multicollinearity
# cor(fire.data$tot_IT, fire.data$FS_noIT)
# cor(fire.data$tot_IT, fire.data$for_ED)
# cor(fire.data$tot_IT, fire.data$for_AI)
# cor(fire.data$tot_IT, fire.data$for_PD)
# cor(poly(fire.data$tot_IT), fire.data$FS_PLAND) # Example of using polynomial terms
#
# cor(poly(fire.data$FS_PLAND), fire.data$for_ED) # 0.2253946
# cor(poly(fire.data$FS_PLAND), fire.data$for_AI) # 0.715645
# cor(poly(fire.data$FS_PLAND), fire.data$for_PD) # 0.2195011
# cor(poly(fire.data$FS_PLAND), fire.data$for_ED) # Example of using polynomial terms
#
# cor(poly(fire.data$FS_noIT), fire.data$for_ED) # 0.02567527
# cor(poly(fire.data$FS_noIT), fire.data$for_AI) # 0.6303039
# cor(poly(fire.data$FS_noIT), fire.data$for_PD) # 0.3686856
# cor(poly(fire.data$FS_noIT), fire.data$for_ED) # Example of using polynomial terms
#
# cor(fire.data$for_ED, fire.data$for_PD) # 0.71855
# cor(fire.data$for_ED, fire.data$for_AI) # 0.3934413
# cor(fire.data$for_PD, fire.data$for_AI) # 0.06931937Model Fitting and Selection
Explore a global model with multiple predictors and interactions
Perform model selection using dredge function to rank models based on AIC
Check gam model and concurvity (analogous to multicollinearity in linear models)
This checks how well each predictor (or smooth term) can be predicted by the other predictors. A concurvity value close to 1 indicates high redundancy, suggesting collinearity among terms.
Reference: Concurvity assessment is based on Noam Ross’s “GAMs in R” tutorial, Chapter 2, Section 10 “If the concurvity value is above 0.8, it is recommended to inspect the model more carefully for potential redundancy issues.”
## $worst
## para s(year) s(X,Y)
## para 1.000000e+00 9.999938e-01 6.981556e-23
## s(year) 9.999938e-01 1.000000e+00 5.675326e-07
## s(X,Y) 6.971286e-23 5.675326e-07 1.000000e+00
##
## $observed
## para s(year) s(X,Y)
## para 1.000000e+00 9.999938e-01 7.502860e-25
## s(year) 9.999938e-01 1.000000e+00 1.646178e-07
## s(X,Y) 6.971286e-23 5.675326e-07 1.000000e+00
##
## $estimate
## para s(year) s(X,Y)
## para 1.000000e+00 9.999938e-01 1.243090e-25
## s(year) 9.999938e-01 1.000000e+00 8.971313e-08
## s(X,Y) 6.971286e-23 5.675326e-07 1.000000e+00
This function checks for possible issues with the model, including residual patterns, quantile plots, and other diagnostics.
##
## Method: GCV Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [1.535614e-08,3.471428e-08]
## (score 0.2747764 & scale 0.2571278).
## eigenvalue range [-3.47127e-08,2.976674e-06].
## Model rank = 38 / 38
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(year) 1.00e+00 1.50e-05 0.94 0.015 *
## s(X,Y) 2.90e+01 2.89e+01 0.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Still, vif calculation with GAMs are not straightforward, besides even harder interpretation when using polynomial terms, so we will refit to test in regular glmms.
Refitting Simplified Models and Generating Predictions
mglm <- gam(log10(fire.incidence + 2) ~ scale(FS_noIT) + I(scale(FS_noIT)^2) +
(scale(tot_IT) + I(scale(tot_IT)^2)) * scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
vif(mglm)## GVIF Df GVIF^(1/(2*Df))
## scale(FS_noIT) 1.248681e+60 0 Inf
## I(scale(FS_noIT)^2) 1.248681e+60 0 Inf
## scale(tot_IT) 1.248681e+60 0 Inf
## I(scale(tot_IT)^2) 1.248681e+60 0 Inf
## scale(for_PD) 1.248681e+60 0 Inf
## year 1.248681e+60 0 Inf
## X 1.248681e+60 0 Inf
## Y 1.248681e+60 0 Inf
## scale(tot_IT):scale(for_PD) 1.248681e+60 0 Inf
## I(scale(tot_IT)^2):scale(for_PD) 1.248681e+60 0 Inf
Modelling
Generally, we build GAM spatial models contain random effects for year and geographical coordinates. For fire related diseases incidence we use gamma family of distribution and log link function.
Absence of effect (aka Baseline Model, ‘null’)
Fit a model of absence of effect with only an intercept and random effects for year and spatial coordinates. This model serves as a baseline to compare against models with predictors.
Single-Predictor Models
Fit models with single predictors for different landscape variables and Indigenous Territories. Here, we first test both linear and polynomial (quadratic) terms to determine which is more parsimonious.
# Model for forest cover within the entire COD/municipality (FS_PLAND)
# Linear model for FS_PLAND (commented out as it was not selected)
# mfire_FS <- gam(log10(fire.incidence + 2) ~ scale(FS_PLAND) + offset(IDH) + s(year, bs = "re") + s(X,Y), family= Gamma(link = "log"), data = fire.data)
# Polynomial model (second degree) for FS_PLAND
mfire_FS2 <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_PLAND), 2) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_FS2); AIC(mfire_FS2) # Display summary and AIC for the polynomial model
# Model for Indigenous Territories (tot_IT)
# Linear model for tot_IT (commented out as it was not selected)
# mfire_IT <- gam(log10(fire.incidence + 2) ~ scale(tot_IT) + offset(IDH) + s(year, bs = "re") + s(X,Y), family= Gamma(link = "log"), data = fire.data)
# Polynomial model (second degree) for tot_IT
mfire_IT2 <- gam(log10(fire.incidence + 2) ~ poly(scale(tot_IT), 2) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_IT2); AIC(mfire_IT2) # Display summary and AIC for the polynomial model
# Model for forest cover outside Indigenous Territories (FS_noIT)
# Linear model for FS_noIT (commented out as it was not selected)
# mfire_FSnoIT <- gam(log10(fire.incidence + 2) ~ scale(FS_noIT) + offset(IDH) + s(year, bs = "re") + s(X,Y), family= Gamma(link = "log"), data = fire.data)
# Polynomial model (second degree) for FS_noIT
mfire_FSnoIT2 <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_noIT), 2) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_FSnoIT2); AIC(mfire_FSnoIT2) # Display summary and AIC for the polynomial modelAdditive Models with Multiple Predictors
Build additive models combining Indigenous Territories (tot_IT) and forest cover outside Indigenous Territories (FS_noIT)
# Additive model with polynomial terms for tot_IT and FS_noIT
mfire_ITFSno <- gam(log10(fire.incidence + 2) ~
poly(scale(tot_IT), 2) + poly(scale(FS_noIT), 2) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_ITFSno); AIC(mfire_ITFSno) # Display summary and AIC for the model
# Interactive models to explore interactions between predictors
mfire_ITFSno1i <- gam(log10(fire.incidence + 2) ~
poly(scale(tot_IT), 2) * scale(FS_noIT) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_ITFSno1i); AIC(mfire_ITFSno1i) # Display summary and AIC for interaction modelInteractive Models with Multiple Predictors
# interativos IT * forest (FS_noIT)
mfire_ITFSno1i <- gam(log10(fire.incidence + 2) ~
poly(scale(tot_IT),2) * scale(FS_noIT) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_ITFSno1i); AIC(mfire_ITFSno1i) # 24290.42
mfire_IT1FSnoi <- gam(log10(fire.incidence + 2) ~
scale(tot_IT) * poly(scale(FS_noIT, 2)) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_IT1FSnoi); AIC(mfire_IT1FSnoi) # 24308.9Introduct fragmentation effect
Single-Predictor Models for Landscape Configuration Variables (e.g., for_ED and for_PD)
# Model for forest edge density (for_ED)
mfire_ED <- gam(log10(fire.incidence + 2) ~ scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_ED); AIC(mfire_ED) # Display summary and AIC for for_ED model
# Model for patch density (for_PD)
mfire_PD <- gam(log10(fire.incidence + 2) ~ scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_PD); AIC(mfire_PD) # Display summary and AIC for for_PD modelLandscape structure: forest cover and frag
Build models combining landscape cover (FS_PLAND) and configuration (for_ED and for_PD)
# Additive model combining FS_PLAND and for_ED
mfire_FSED <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_PLAND), 2) + scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_FSED); AIC(mfire_FSED) # Display summary and AIC for the model
# Additive model combining FS_PLAND and for_PD
mfire_FSPD <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_PLAND), 2) + scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)
# summary(mfire_FSPD); AIC(mfire_FSPD) # Display summary and AIC for the modelITs and configuration
mfire_TIED <- gam(log10(fire.incidence + 2) ~ poly(scale(tot_IT),2) + scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_TIED); AIC(mfire_TIED) # 24272.48
mfire_TIPD <- gam(log10(fire.incidence + 2) ~ poly(scale(tot_IT),2) + scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_TIPD); AIC(mfire_TIPD) # 24118.99Tripple additives IT + forests + frags
mfire_ITFSED <- gam(log10(fire.incidence + 2) ~ poly(scale(tot_IT),2) +
poly(scale(FS_noIT),2) + scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_ITFSED); AIC(mfire_ITFSED) # 24083.9
mfire_ITFSPD <- gam(log10(fire.incidence + 2) ~ poly(scale(tot_IT),2) +
poly(scale(FS_noIT),2) + scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_ITFSPD); AIC(mfire_ITFSPD) # 23942.26Interactive doubles
## interactive landscape structure
mfire_FSEDi <- gam(log10(fire.incidence + 2) ~
poly(scale(FS_PLAND),2) * scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_FSEDi); AIC(mfire_FSEDi) # 24040.44
mfire_FSPDi <- gam(log10(fire.incidence + 2) ~
poly(scale(FS_PLAND),2) * scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_FSPDi); AIC(mfire_FSPDi) # 23958.512x Interactive TI frags
mfire_TIEDi <- gam(log10(fire.incidence + 2) ~
poly(scale(tot_IT),2) * scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_TIEDi); AIC(mfire_TIEDi) # 24230.41
mfire_TIPDi <- gam(log10(fire.incidence + 2) ~
poly(scale(tot_IT),2) * scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_TIPDi); AIC(mfire_TIPDi) # 24092.692x Interactive + singel terms:
IT + interactive landscape structure
mfire_ITiFSED <- gam(log10(fire.incidence + 2) ~ poly(scale(tot_IT),2) +
poly(scale(FS_noIT),2) * scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_ITiFSED); AIC(mfire_ITiFSED) # 24068.79
mfire_ITiFSPD <- gam(log10(fire.incidence + 2) ~ poly(scale(tot_IT),2) +
poly(scale(FS_noIT),2) * scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_ITiFSPD); AIC(mfire_ITiFSPD) # 23927.62Landscape cover + interactive IT * landscape configuration
mfire_FSiTIED <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_noIT),2) +
poly(scale(tot_IT),2) * scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_FSiTIED); AIC(mfire_FSiTIED) # 23981.11
mfire_FSiTIPD <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_noIT),2) +
poly(scale(tot_IT),2) * scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_FSiTIPD); AIC(mfire_FSiTIPD) # 23895.59Landscape configuration + interactive IT * landscape cover
mfire_EDiFSTI1 <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_noIT),2) *scale(tot_IT) +
scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_EDiFSTI1); AIC(mfire_EDiFSTI1) # 240048.01
mfire_EDiFS1TI <- gam(log10(fire.incidence + 2) ~ scale(FS_noIT) *poly(scale(tot_IT),2) +
scale(for_ED) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_EDiFS1TI); AIC(mfire_EDiFS1TI) # 24249.01
mfire_PDiFSTI1 <- gam(log10(fire.incidence + 2) ~ poly(scale(FS_noIT),2) * scale(tot_IT) +
scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_PDiFSTI1); AIC(mfire_PDiFSTI1) # 23910.84
mfire_PDiFS1TI <- gam(log10(fire.incidence + 2) ~ scale(FS_noIT) * poly(scale(tot_IT),2) +
scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X,Y),
family= Gamma(link = "log"), data = fire.data)
# summary(mfire_PDiFS1TI); AIC(mfire_PDiFS1TI) # 24105.58Model Selection Using AIC Comparison
Perform model selection by comparing all models based on AIC using AICtab function
(mod.sel <- AICtab(mfire0,
# FSs
mfire_FS2,
# ITs
mfire_IT2,
# FSs outside IT
mfire_FSnoIT2,
# additives IT + forest (PLAND and FS_noIT)
mfire_ITFSno,
# interativos IT * forest (FS_noIT)
mfire_IT1FSnoi,
mfire_ITFSno1i,
# frags solitos
mfire_ED, mfire_PD,
# landscape structure: forest cover and frag
mfire_FSED, mfire_FSPD,
# additive TI frags
mfire_TIED, mfire_TIPD,
# interactive doubles
mfire_FSEDi, mfire_FSPDi, ## interactive landscape structure
mfire_TIEDi, mfire_TIPDi,## interactive TI frags
## tipple additives IT + forests + frags
mfire_ITFSED, mfire_ITFSPD,
# triple terms aad + interactive:
mfire_ITiFSED, mfire_ITiFSPD, # IT + interactive landscape structure
mfire_FSiTIED, mfire_FSiTIPD, # cover + IT*frag
mfire_EDiFSTI1, mfire_EDiFS1TI, mfire_PDiFSTI1, mfire_PDiFS1TI, # frag + cover*TI
## tipple interactive: IT * forests * frags
# mfire_ITFSEDi, mfire_ITFSPDi,
base = TRUE, weights = TRUE)) # Include null model and calculate weights## AIC dAIC df weight
## mfire_FSiTIPD 40844.9 0.0 37.9 1
## mfire_ITFSPD 40863.9 19.0 35.9 <0.001
## mfire_ITiFSPD 40867.7 22.8 37.9 <0.001
## mfire_PDiFSTI1 40883.9 39.0 36.9 <0.001
## mfire_ITiFSED 40928.8 83.9 37.9 <0.001
## mfire_FSPDi 40929.5 84.6 35.9 <0.001
## mfire_FSiTIED 40948.2 103.3 37.9 <0.001
## mfire_FSPD 40951.0 106.1 33.9 <0.001
## mfire_TIPDi 40954.1 109.2 35.9 <0.001
## mfire_PDiFS1TI 40977.9 133.0 36.9 <0.001
## mfire_TIPD 40980.0 135.1 33.9 <0.001
## mfire_ITFSED 40983.7 138.8 35.9 <0.001
## mfire_FSEDi 40999.8 154.9 35.9 <0.001
## mfire_EDiFSTI1 41009.6 164.7 36.9 <0.001
## mfire_PD 41045.4 200.5 31.9 <0.001
## mfire_FSED 41058.6 213.7 33.9 <0.001
## mfire_TIEDi 41066.8 221.9 35.9 <0.001
## mfire_ITFSno 41073.3 228.4 34.9 <0.001
## mfire_EDiFS1TI 41087.1 242.2 36.9 <0.001
## mfire_TIED 41089.2 244.3 33.9 <0.001
## mfire_ITFSno1i 41106.2 261.3 35.9 <0.001
## mfire_IT2 41111.0 266.1 32.9 <0.001
## mfire_IT1FSnoi 41121.9 277.0 33.9 <0.001
## mfire_FSnoIT2 41140.8 295.9 32.9 <0.001
## mfire_FS2 41177.9 333.0 32.9 <0.001
## mfire_ED 41178.2 333.3 31.9 <0.001
## mfire0 41208.2 363.4 30.9 <0.001
Top selected model output
Summarize the selected model to examine the fitted results
##
## Family: Gamma
## Link function: log
##
## Formula:
## log10(fire.incidence + 2) ~ poly(scale(FS_noIT), 2) + poly(scale(tot_IT),
## 2) * scale(for_PD) + offset(IDH) + s(year, bs = "re") + s(X,
## Y)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.247485 0.007795 -31.749 < 2e-16 ***
## poly(scale(FS_noIT), 2)1 -4.324751 0.863584 -5.008 5.55e-07 ***
## poly(scale(FS_noIT), 2)2 -7.629133 0.698142 -10.928 < 2e-16 ***
## poly(scale(tot_IT), 2)1 -5.746103 1.212839 -4.738 2.18e-06 ***
## poly(scale(tot_IT), 2)2 -7.037328 1.024674 -6.868 6.73e-12 ***
## scale(for_PD) -0.087381 0.006622 -13.196 < 2e-16 ***
## poly(scale(tot_IT), 2)1:scale(for_PD) -3.192288 1.529962 -2.087 0.0369 *
## poly(scale(tot_IT), 2)2:scale(for_PD) -5.583778 1.113081 -5.017 5.31e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(year) 1.506e-05 1 0.001 <2e-16 ***
## s(X,Y) 2.890e+01 29 391.863 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.6 Deviance explained = 42.9%
## GCV = 0.27324 Scale est. = 0.25388 n = 18017
Tidy up the model summary to get a clean output of the parametric terms
## # A tibble: 8 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -0.247 0.00780 -31.7 2.79e-215
## 2 poly(scale(FS_noIT), 2)1 -4.32 0.864 -5.01 5.55e- 7
## 3 poly(scale(FS_noIT), 2)2 -7.63 0.698 -10.9 1.04e- 27
## 4 poly(scale(tot_IT), 2)1 -5.75 1.21 -4.74 2.18e- 6
## 5 poly(scale(tot_IT), 2)2 -7.04 1.02 -6.87 6.73e- 12
## 6 scale(for_PD) -0.0874 0.00662 -13.2 1.41e- 39
## 7 poly(scale(tot_IT), 2)1:scale(for_PD) -3.19 1.53 -2.09 3.69e- 2
## 8 poly(scale(tot_IT), 2)2:scale(for_PD) -5.58 1.11 -5.02 5.31e- 7
Model Diagnostics
Concurvity
Check analogous to multicollinearity in linear models tests
## $worst
## para s(year) s(X,Y)
## para 1.000000e+00 9.999938e-01 6.981556e-23
## s(year) 9.999938e-01 1.000000e+00 5.675326e-07
## s(X,Y) 6.994551e-23 5.675326e-07 1.000000e+00
##
## $observed
## para s(year) s(X,Y)
## para 1.000000e+00 9.999938e-01 6.187055e-25
## s(year) 9.999938e-01 1.000000e+00 1.773809e-07
## s(X,Y) 6.994551e-23 5.675326e-07 1.000000e+00
##
## $estimate
## para s(year) s(X,Y)
## para 1.000000e+00 9.999938e-01 1.243090e-25
## s(year) 9.999938e-01 1.000000e+00 8.971313e-08
## s(X,Y) 6.994551e-23 5.675326e-07 1.000000e+00
Model check
Run additional diagnostic checks for model assumptions using gam.check()
##
## Method: GCV Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [1.717335e-08,3.511691e-08]
## (score 0.2732444 & scale 0.2538832).
## eigenvalue range [-3.511531e-08,2.89316e-06].
## Model rank = 38 / 38
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(year) 1.00e+00 1.51e-05 0.94 0.03 *
## s(X,Y) 2.90e+01 2.89e+01 0.19 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Overdispersion
Test for overdispersion in the residuals using DHARMa’s testDispersion()
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.55373, p-value < 2.2e-16
## alternative hypothesis: two.sided
Test and plot residuals
Generate residual diagnostic plots to visually inspect residuals and validate model assumptions. Creates diagnostic plots such as residual vs. fitted plots, QQ plots, and others for more in-depth inspection.
## $uniformity
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.042531, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.55373, p-value < 2.2e-16
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: simulationOutput
## outliers at both margin(s) = 168, observations = 18017, p-value =
## 0.04415
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.007973005 0.010837772
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0.009324527
## $uniformity
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: simulationOutput$scaledResiduals
## D = 0.042531, p-value < 2.2e-16
## alternative hypothesis: two-sided
##
##
## $dispersion
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 0.55373, p-value < 2.2e-16
## alternative hypothesis: two.sided
##
##
## $outliers
##
## DHARMa outlier test based on exact binomial test with approximate
## expectations
##
## data: simulationOutput
## outliers at both margin(s) = 168, observations = 18017, p-value =
## 0.04415
## alternative hypothesis: true probability of success is not equal to 0.007968127
## 95 percent confidence interval:
## 0.007973005 0.010837772
## sample estimates:
## frequency of outliers (expected: 0.00796812749003984 )
## 0.009324527
Prediction plot
We originally use orthogonal polynomials
(i.e. poly(predictor_ _variable, 2)) for model selection
due to their statistical advantages, which often leads to more reliable
statistical inference. So model selection was based on orthogonal
polynomials (poly(x, 2)) for robustness, but for practical
visualization purpose, we use regular quadratic terms
(I(x^2)) for clarity. The overall predicted trends (i.e.,
the shapes of the response curves) produced by a model using orthogonal
polynomials (poly(x, 2)) and regular quadratic terms
(I(x^2)) are generally comparable. Both models capture the
quadratic relationships in the data.
mfire_FSiTIPD_quad <- gam(log10(fire.incidence + 2) ~
scale(FS_noIT) + I(scale(FS_noIT)^2) +
(scale(tot_IT) + I(scale(tot_IT)^2)) * scale(for_PD) +
offset(IDH) + s(year, bs = "re") + s(X, Y),
family = Gamma(link = "log"), data = fire.data)Plot predicted effects of Indigenous Territories (tot_IT) on fire incidence at different levels of patch density (for_PD)
# Generate predictions for the fitted model at different levels of predictors. Predictions are made for a range of values for each predictor; 'all' indicates the full range.
preds <- ggpredict(mfire_FSiTIPD_quad, terms = c("tot_IT [all]", "for_PD", "FS_noIT"))
ggplot(preds, aes(x = x, y = predicted, color = group)) +
geom_line(linewidth = 1) + # Plot prediction lines
geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.2) + # Add confidence intervals as ribbons
labs(
title = "Predicted Effect of IT ~ PD, on low, median and hight FS cover",
x = "IT percent",
y = "Predicted Fire Incidence",
color = "PD Levels",
fill = "PD Levels"
) +
scale_color_manual(values = c("#00441b", "#238b45", "#66c2a4")) +
scale_fill_manual(values = c("#00441b", "#238b45", "#66c2a4")) +
facet_wrap(~facet) # Facet plot by the 'facet' variable to display different groupsVisualize Two-by-Two Interactions
IT and PD
# Generate predictions for interactions between tot_IT and for_PD
preds_TI_PD <- ggpredict(mfire_FSiTIPD_quad, terms = c("tot_IT [all]", "for_PD"))
# Plot predicted effects for IT on Fire Incidence at different levels of PD
(plot_TI_PD <- ggplot(preds_TI_PD, aes(x = x, y = predicted, color = group)) +
geom_line(linewidth = 1) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.2) +
labs(
title = "Predicted Effect of IT on Fire Incidence at Different Levels of PD",
x = "IT percent",
y = "Predicted Fire Incidence",
color = "PD Levels",
fill = "PD Levels"
) +
scale_color_manual(values = c("#00441b", "#238b45", "#66c2a4")) +
scale_fill_manual(values = c("#00441b", "#238b45", "#66c2a4")))IT and FS cover
# Generate predictions for interactions between tot_IT and FS_noIT
preds_TI_FS <- ggpredict(mfire_FSiTIPD_quad, terms = c("tot_IT [all]", "FS_noIT"))
# Plot predicted effects for IT on Fire Incidence at different levels of FS_noIT
(plot_TI_FS <- ggplot(preds_TI_FS, aes(x = x, y = predicted, color = group)) +
geom_line(linewidth = 1) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = group), alpha = 0.2) +
labs(
title = "Predicted Effect of IT on Fire Incidence at Different Levels of FS",
x = "IT percent",
y = "Predicted Fire Incidence",
color = "FS cover",
fill = "FS cover"
) +
scale_color_manual(values = c("#66c2a4", "#238b45", "#00441b")) +
scale_fill_manual(values = c("#66c2a4", "#238b45", "#00441b")))Plot Individual Predictor Effects
# Generate and plot predictions for single predictors
preds_PD <- ggpredict(mfire_FSiTIPD_quad, terms = "for_PD") # Predictions for PD
# Plot for PD
plot_PD <- ggplot(preds_PD, aes(x = x, y = predicted)) +
geom_line(linewidth = 1) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
labs(
x = "Patch density",
y = "Predicted Fire Incidence")
# Generate and plot predictions for tot_IT
preds_IT <- ggpredict(mfire_FSiTIPD_quad, terms = "tot_IT") # Predictions for PD
plot_IT <- ggplot(preds_IT, aes(x = x, y = predicted)) +
geom_line(linewidth = 1) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
labs(
x = "Extent of indigenous territories",
y = "Predicted Fire Incidence"
)
# Combine plots side by side using patchwork
(combined_plot <- plot_IT + plot_PD) # Combine IT and PD plots for side-by-side comparison